Prepare computational envirinment:
#set global code chunk parameters for knitting:
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
#load libraries (install if absent):
pacman::p_load(tidyverse, #various tools
here, #easy file paths
forcats, #category wrangling
countrycode, #country codes and continents
rstatix, #statistical tests
DT, #data tables
knitr, #document knitting
ggimage, #adding images
ggcharts, #plots functionality
ggmosaic, #mosaic plots
scales, #plots extra
readxl, #loading Excel
forcats, #category wrangling
ggupset, #upset plots
patchwork, #plot assembly
stringr, #text wrangling
ggbeeswarm, #beeswarm plots
tidytext, #text processing
stopwords, #text processing
tokenizers, #text processing
ggpattern, #patterning for plots
wacolors, #color-blind friendly color palettes
lme4, #linear models
sjPlot) #for tab_model reporting
## may need:
# install.packages("remotes")
# remotes::install_github("coolbutuseless/ggpattern")
library(ggpattern)
## set a global plotting theme with background transparency:
theme_set(theme_classic(base_size = 14) +
theme(panel.background = element_blank(),
plot.background = element_blank(),
legend.background = element_blank(),
text = element_text(colour = "white")))
#color-blind friendly color palettes: https://cran.r-project.org/web/packages/khroma/vignettes/tol.html#lightLoad data and an prepare associated meta-data table:
# accessing all the sheets
#sheet_names <- excel_sheets(here("data", "scimagojr 2021 Subject Areas.xlsx"))
dat <- read.csv(here("data", "MASTER EcoEvo_membership_fees_v1 - reconciled_data_final.csv"))
dim(dat)
#names(dat)
dat <- dat %>% select(-starts_with('extractor')) #remove columns with extractor names
#View(dat)
#create a meta-data table with rows from current columns names:
dat_meta <- tibble("original column name" = names(dat),
"description" = c("Full name of the society - (the name from the master list, do not include or add abbreviated name).",
"Society info source - Main source of society information (usually, the main webpage address of the society).",
"Society base country - Country where society has been originally established / registered or has headquarters. Note: It will usually match the currency in which membership is paid, so if e.g. headquarters/chapters are in more than one country, only enter the country that matches the payment currency. Use the following abbreviations: USA and UK; for all other countries use the full name of the country.",
"Society type: International by name - (society name includes “International”, or continent, or a broad region, or an equivalent term); National by name - (society name includes the name of its country of origin, e.g. Japanese, Indian, British, American or equivalent); International by chapter - (society claims to have international chapters / branches / sections, i.e. in other countries or regions than the original country, e.g. Ecological Society of America has a Latin America and the Caribbean Chapter); International by aims or scope of activities - (select if none of the other options fit - i.e. only select this one if you did not tick any of the other boxes)",
"EDI (Equity, Diversity, Inclusion) statement present - Does the society have an EDI (Equity, Diversity, Inclusion) statement on the website or policy documents?",
"EDI (Equity, Diversity, Inclusion) structures present - Does the society have an EDI (Equity, Diversity, Inclusion) structure (e.g, a dedicated committee, section, or an officer)?",
"EDI comment - Note or copy and paste any relevant information on EDI statement or structure (e.g., where it can be found).",
"Membership fees source - Membership fees source of information (usually, a sub-page or a document). Ideally, copy and paste a link to an online page/document with information on membership fees.",
"Record currency of society fees. Use USD, if provided. Use ISO 4217 currency codes (e.g., USD, EUR, AUD).",
"Standard individual regular membership fee per year - Only record the number, in the currency used by the society. If necessary, divide by the number of years the fee covers (e.g., for 3-year membership divide the fee by 3). For free membership, record 0. If information or a given fee type is not available, leave it empty. If multiple levels of regular fees are available (e.g., depending on country / region / income / mode of payment), record the highest one and add a comment below. Exception: if there is a lower fee without a mailed printed copy of a journal, select this online-only subscription fee category instead of a fee with a printed copy (here we assume it is not a significant benefit worth paying a higher fee and most regular members would be happy with online access only).",
"Comment on the standard individual regular membership fee per year - Any comments, e.g. the name of the membership category used on the website, you can also copy and paste relevant text.",
"Standard individual student membership fee per year - Only record the number, in the currency used by the society. If necessary, divide by the number of years the fee covers (e.g., for 3-year membership divide the fee by 3). For free membership record 0. If information or a given fee type is not available, leave empty. If multiple levels of student fees are available (e.g., depending on country / region / income), record the highest one and add a comment below. Exception: if there is a lower fee without a mailed printed copy of a journal, select this online-only subscription fee category instead of a fee with a printed copy (here we assume it is not a significant benefit worth paying a higher fee and most student members would be happy with online access only).",
"Comment on the standard individual student membership fee per year - Any comments, e.g. the name of the membership category used on the website, you can also copy and paste relevant text.",
"Standard individual postdoc membership fee per year - Only record the number, in the currency used by the society. If necessary, divide by the number of years the fee covers (e.g., for 3-year membership divide the fee by 3). For free membership record 0. If information or a given fee type is not available, leave empty. If multiple levels of postdoc fees are available (e.g., depending on country / region / income), record the highest one and add a comment below. Exception: if there is a lower fee without a mailed printed copy of a journal, select this online-only subscription fee category instead of a fee with a printed copy (here we assume it is not a significant benefit worth paying a higher fee and most postdoc members would be happy with online access only).",
"Comment on the standard individual postdoc membership fee per year - Any comments, e.g. the name of the membership category used on the website, you can also copy and paste relevant text.",
"Eligibility timeframe for standard individual postdoc membership fee - Only record the number of years representing either the number of years after PhD award when this fee category can be applied (e.g., within 2 years after PhD) or for how many years the fee category can be used (e.g., can be used for a maximum of 2 years). If no postdoc fees or no information on the timeframe, leave empty.",
"Comment on the eligibility timeframe for standard individual postdoc membership - Any comments, you can also copy and paste relevant text.",
"Discounted fees available for individual members from some countries or regions - Select YES if the description mentions any discounts based on researcher location/affiliation. - Select NO if the description does not mention any discounts based on researcher location/affiliation (in the next question, you can copy and paste relevant text or make a note).",
"Countries or regions eligible for discounted/waived fees - Copy and paste from society documents (e.g., low-income countries, Global South, specific country names)",
"Comment on countries or regions eligible for discounted/waived fees - Any additional comments (e.g., multiple discount levels, or additional conditions such as a limit on the number of years with discount)",
"Increased fees available for individual members from some countries or regions (e.g. outside society’s home country) - Select YES if the description mentions any fee increase based on researcher location/affiliation. - Select NO if the award description does not mention any fee increase based on researcher location/affiliation (in the next question you can copy and paste relevant text or make a note).",
"Countries or regions eligible for increased fees - Copy and paste from society documents (e.g., low-income countries, Global South, specific country names).",
"Comment on countries or regions eligible for increased fees - Copy and paste from society documents (e.g., any foreign countries, developed countries, high-income countries, specific country names).",
"Discounted individual membership fees available for the following groups, as stated in the membership information. More than one choice is possible. - STUDENT includes university students at any level (undergraduate, postgraduate). - POSTDOC includes early career researchers (ECR) after PhD (excluding students). - JUNIOR includes pre-university students (e.g., high school). - NON-ACADEMIC SPECIALIST includes educators / outreach / communication and similar professionals. - FEES PROPORTIONAL TO INCOME BRACKETS - here we mean only personal/individual income (salary) not the country-level income or development status. - ignore lifetime memberships (do not code them as OTEHR).",
"Comment on groups eligible for discounted fees - Any comments on the above categories (e.g., what are the “other” discounts ps not captured above, time limits on discounts).",
"Complete or partial individual membership fee waivers available on individual request - Code YES if additional individual-based fee waivers/discounts available on request (e.g., due to any special circumstances). In the next question you can copy and paste relevant text from the award document or make a note if no such document/information is available.",
"Comment on individual requests for discounted fees or waivers - Wording of the eligibility criteria in relation to individual waivers or discounts (e.g., application procedure or no questions asked fee waiver).",
"Voluntary donations not linked to membership application - Code YES if society explicitly accepts or asks for such donations (e.g., on top of membership fee, or as a separate payment). This includes only donations that do not result in the membership status and do not come with any other direct benefits to the donating person, such as subscriptions, website access, etc.; donors names being listed somewhere are ok.",
"Comment on voluntary donations not linked to membership application - You can note anything relevant or unclear regarding donations.",
"Individual full membership benefits - select all applicable benefits for full/regular/standard members (excluding voting rights, volunteering etc.), as stated or inferred from the society website/documents. Focus on what s listed on the page advertising membership, you do not need to search the whole website to collect all activities society provides.",
"Comment on society membership benefits - Copy and paste from the website/documents and add any relevant notes on the society membership benefits (e.g., define OTHER, cannot find explicit information, no information / not clear what the benefits are, add any comments on special conditions and restrictions).",
"Comments_general - Add any other notes and comments on issues, assumptions, or seeking additional information, for a given society in general.",
"Whether given data row should be potentially removed from analyses due to society deemed as irrelevant to ecology and evolutionary biology during extraction stage.",
"Whether given data row should be potentially removed from analyses due to society not providing any extractable information on their membership fees.",
"Whether membership fees are provided in more than one currency on their website/documents."),
"data type [options]" = c("Singular variable: text",
"Singular variable: link",
"Singular variable: text",
"Plural variable: International by name / National by name / International by chapter / International by aims or scope of activities",
"Singular variable: yes / no",
"Singular variable: yes / no",
"Singular variable: text",
"Singular variable: link",
"Singular variable: text",
"Singular variable: number",
"Singular variable: text",
"Singular variable: number",
"Singular variable: text",
"Singular variable: number",
"Singular variable: text",
"Singular variable: number",
"Singular variable: text",
"Singular variable: yes / no",
"Singular variable: text",
"Singular variable: text",
"Singular variable: yes / no",
"Singular variable: text",
"Singular variable: text",
"Plural variable: student / postdoc / ECR (excluding students) / retired/emeritus / unemployed / employed part-time / junior / family / non-academic specialists / general community/public / fees proportional to income brackets / discretionary fee amount / no fees / other",
"Singular variable: text",
"Singular variable: yes / no",
"Singular variable: text",
"Singular variable: yes / no",
"Singular variable: text",
"Plural variable: Conference registration discount or waiver / Funding (e.g., travel awards/grants, research funding, prizes) / Journal subscription discount or waiver / Publication fees (APC) discount or waiver / Networking or professional development (e.g., membership platform, mentoring, exclusive webinars, workshops, training courses) / Other",
"Singular variable: text",
"Singular variable: text",
"Binary variable: 1 / 0",
"Binary variable: 1 / 0",
"Binary variable: 1 / 0"
))Meta-data for the extracted data on membership fees of ecological and evolutionary societies (matches Table S2) .
#dim(dat)
#Rename selected variables (columns) to shorter names for analyses
dat <- dat %>% rename( society = "Full.name.of.the.society",
society_link = "Society.info.source",
society_country = "Society.base.country",
society_type = "Society.type",
society_EDI_statement = "EDI..Equity..Diversity..Inclusion..statement.present",
society_EDI_structure = "EDI..Equity..Diversity..Inclusion..structure.present",
society_EDI_comment = "EDI.comment",
fees_info_source = "Fees.info.source",
fees_currency = "Currency.of.society.fees",
fee_regular = "Standard.individual.regular.membership.fee.per.year",
fee_regular_comment = "Comment.on.the.standard.individual.regular.membership.fee.per.year",
fee_student = "Standard.individual.student.membership.fee.per.year",
fee_student_comment = "Comment.on.the.standard.individual.student.membership.fee.per.year",
fee_postdoc = "Standard.individual.postdoc.membership.fee.per.year",
fee_postdoc_comment = "Comment.on.the.standard.individual.postdoc.membership.fee.per.year",
postdoc_eligibiity_years = "Eligibility.timeframe.for.standard.individual.postdoc.membership.fee",
postdoc_eligibiity_comment = "Comment.on.the.eligibility.timeframe.for.standard.individual.postdoc.membership.fee",
countries_payless = "Discounted.fees.available.for.individual.members.from.some.countries.or.regions",
countries_payless_which = "Countries.or.regions.eligible.for.discounted.waived.fees",
countries_payless_comment = "Comment.on.countries.or.regions.eligible.for.discounted.waived.fees",
countries_paymore = "Increased.fees.imposed.on.individual.members.from.some.countries.or.regions..e.g..outside.society.s.home.country.",
countries_paymore_which = "Countries.or.regions.eligible.for.increased.fees",
countries_paymore_comment = "Comment.on.countries.or.regions.eligible.for.increased.fees",
discount_types = "Discounted.individual.membership.fees.available.for.the.following.groups",
discount_types_comment = "Comment.on.groups.eligible.for.discounted.fees",
individual_waivers = "Complete.or.partial.individual.membership.fee.waivers.available.on.individual.request",
individual_waivers_comment = "Comment.on.individual.requests.for.discounted.fees.or.waivers",
voluntary_donations = "Voluntary.donations.not.linked.to.membership.application",
voluntary_donations_comment = "Comment.on.voluntary.donations.not.linked.to.membership.application",
benefits_types = "Individual.full.membership.benefits",
benefits_types_comment = "Comment.on.society.membership.benefits",
comments_general = "Comments_general",
censor_irrelevant = "Censor.irrelevant",
censor_noinfo = "Censor.noinfo",
multiple_currencies = "Multiple.currencies"
)
#names(dat)
# sum(dat$censor_irrelevant) #2 rows with non-eligible societies
#Remove 2 rows with ineligible societies using censor_irrelevant column:
dat <- subset(dat, censor_irrelevant == 0) #subsetting the data frameFind and remove societies without published fees information:
# sum(dat$censor_noinfo) #9 societies without fees information
#show names of societies with no info
dat %>% filter(censor_noinfo == 1) %>% select(society)## society
## 1 Asian Society of Vector Ecology
## 2 Australasian Evolution Society
## 3 Gazi Entomological Research Society
## 4 International Network for the Study of Asian Ants
## 5 International Society for Systems Biology
## 6 Iranian Society of Ichthyology
## 7 Romanian Society of Palaeontologists
## 8 Sociedad Latinoamericana de Briología
## 9 Society for Vector Ecology
#Remove 9 rows with ineligible societies using censor_noinfo column:
dat <- subset(dat, censor_noinfo == 0) #subsetting the data frame
#dim(dat)Find and remove societies with free membership for everyone:
## [1] "European Ornithologists' Union"
## [2] "European Pond Conservation Network"
## [3] "International Association for Ecology"
## [4] "International Council for the Exploration of the Sea"
#Remove 4 rows with ineligible societies using censor_noinfo column:
dat <- subset(dat, fee_regular != 0) #subsetting the data frame
#dim(dat) #169 remainingNumber of societies remaining for analyses: 169, 35.
##Individual full membership, student, and postdoctoral fees
Recalculate values into USD for fee_regular, fee_student, fee_postdoc:
# Create dataframe with exchange rates from GoogleFinance (2024/02/23)
rates_df = data.frame(fees_currency = c('ARS','AUD','CHF','CZK','EUR','GBP','HUF','INR','JPY','KES','KRW','NOK','NZD','PHP','PLN','RON','THB','USD','ZAR'),
exchange_rate_USD = c(0.0012, 0.6569, 1.1354, 0.0427, 1.0828, 1.267, 0.0028, 0.0121, 0.0066, 0.0069, 0.0008, 0.0952, 0.6197, 0.0179, 0.2502, 0.2176, 0.0277, 1, 0.0519))
#use left_joint to add to the main data set
dat <- left_join(dat, rates_df, by = c("fees_currency" = "fees_currency"))
#recalculate fee_regular, fee_student, fee_postdoc into USD
dat <- dat %>% mutate(fee_regular_USD = fee_regular * exchange_rate_USD,
fee_student_USD = fee_student * exchange_rate_USD,
fee_postdoc_USD = fee_postdoc * exchange_rate_USD)Availability of fee_regular_USD, fee_student_USD, fee_postdoc_USD data:
Number of records with regular fee values in USD: 169 ( 100%).
Number of records with student fee values in USD: 141 ( 83.4%).
Number of records with postdoc fee values in USD: 44 ( 26%).
Summarise distributions of fee_regular_USD, fee_student_USD, fee_postdoc_USD:
#reshape data frame for all fees to be in the same column
dat_long <- dat %>%
select(society, fee_regular_USD, fee_student_USD, fee_postdoc_USD) %>%
pivot_longer(cols = c(fee_regular_USD, fee_student_USD, fee_postdoc_USD), names_to = "fee_type", values_to = "fee_value")
#calculate mean, mediam, min, max for dat_long by fee_type
dat_long %>%
group_by(fee_type) %>%
summarise(mean = mean(fee_value, na.rm = TRUE),
median = median(fee_value, na.rm = TRUE),
min = min(fee_value, na.rm = TRUE),
max = max(fee_value, na.rm = TRUE))## # A tibble: 3 × 5
## fee_type mean median min max
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 fee_postdoc_USD 48.0 49 0 119
## 2 fee_regular_USD 67.8 56 0.908 271.
## 3 fee_student_USD 27.4 25.3 0 120
Summarise values of fee_regular_USD, fee_student_USD, fee_postdoc_USD that are > 100 USD:
## fee_regular_USD
dat %>%
select(fee_regular_USD) %>%
filter(!is.na(fee_regular_USD)) %>%
count(fee_regular_USD > 100) %>%
mutate(percent = round(n/sum(n)*100, 1))## fee_regular_USD > 100 n percent
## 1 FALSE 136 80.5
## 2 TRUE 33 19.5
## fee_student_USD
dat %>%
select(fee_student_USD) %>%
filter(!is.na(fee_student_USD)) %>%
count(fee_student_USD > 100) %>%
mutate(percent = round(n/sum(n)*100, 1))## fee_student_USD > 100 n percent
## 1 FALSE 140 99.3
## 2 TRUE 1 0.7
## fee_postdoc_USD
dat %>%
select(fee_postdoc_USD) %>%
filter(!is.na(fee_postdoc_USD)) %>%
count(fee_postdoc_USD > 100) %>%
mutate(percent = round(n/sum(n)*100, 1))## fee_postdoc_USD > 100 n percent
## 1 FALSE 42 95.5
## 2 TRUE 2 4.5
#all
dat_long %>%
group_by(fee_type) %>%
filter(!is.na(fee_value)) %>%
count(fee_value > 100) -> fee_values_over100Summarise values of fee_regular_USD, fee_student_USD, fee_postdoc_USD that are > 50 USD:
## fee_regular_USD > 50
dat %>%
select(fee_regular_USD) %>%
filter(!is.na(fee_regular_USD)) %>%
count(fee_regular_USD > 50) %>%
mutate(percent = round(n/sum(n)*100, 1))## fee_regular_USD > 50 n percent
## 1 FALSE 73 43.2
## 2 TRUE 96 56.8
## fee_student_USD > 50
dat %>%
select(fee_student_USD) %>%
filter(!is.na(fee_student_USD)) %>%
count(fee_student_USD > 50) %>%
mutate(percent = round(n/sum(n)*100, 1))## fee_student_USD > 50 n percent
## 1 FALSE 132 93.6
## 2 TRUE 9 6.4
## fee_postdoc_USD > 50
dat %>%
select(fee_postdoc_USD) %>%
filter(!is.na(fee_postdoc_USD)) %>%
count(fee_postdoc_USD > 50) %>%
mutate(percent = round(n/sum(n)*100, 1))## fee_postdoc_USD > 50 n percent
## 1 FALSE 27 61.4
## 2 TRUE 17 38.6
#all
dat_long %>%
group_by(fee_type) %>%
filter(!is.na(fee_value)) %>%
count(fee_value > 50) -> fee_values_over50Summarise distribution of % discount for student fees relative to regular fee:
#reshape data frame for all fees to be in the same column
dat <- dat %>%
mutate(fee_student_pct = round((fee_student_USD / fee_regular_USD * 100),1)) %>%
mutate(fee_student_pct = replace(fee_student_pct, fee_student_pct == 0, NA)) %>% #remove where students pay 0
mutate(fee_student_pct = replace(fee_student_pct, fee_student_pct >= 100, NA)) #remove where students pay same
#sort(dat$fee_student_pct) #check
#calculate mean, mediam, min, max for fee_student_pct
dat %>%
filter(!is.na(fee_student_pct)) %>%
filter(fee_student_pct != 100) %>%
summarise(mean = mean(fee_student_pct, na.rm = TRUE),
median = median(fee_student_pct, na.rm = TRUE),
min = min(fee_student_pct, na.rm = TRUE),
max = max(fee_student_pct, na.rm = TRUE))## mean median min max
## 1 42.69493 44.4 10 90
Summarise distribution of % discount for postdoc fees relative to regular fee:
#reshape data frame for all fees to be in the same column
dat <- dat %>%
mutate(fee_postdoc_pct = round((fee_postdoc_USD / fee_regular_USD * 100),1)) %>%
mutate(fee_postdoc_pct = replace(fee_postdoc_pct, fee_postdoc_pct == 0, NA)) %>% #remove where students pay 0
mutate(fee_postdoc_pct = replace(fee_postdoc_pct, fee_postdoc_pct >= 100, NA)) #remove where students pay same
#sort(dat$fee_postdoc_pct) #check
#calculate mean, mediam, min, max for fee_postdoc_pct
dat %>%
filter(!is.na(fee_postdoc_pct)) %>%
summarise(mean = mean(fee_postdoc_pct, na.rm = TRUE),
median = median(fee_postdoc_pct, na.rm = TRUE),
min = min(fee_postdoc_pct, na.rm = TRUE),
max = max(fee_postdoc_pct, na.rm = TRUE))## mean median min max
## 1 52.90714 50 23.1 76.5
Plot fee values by fee type:
#wacolors$washington_pass #see colour palette
#boxplot with scatters
fig1A <- dat_long %>%
filter(!is.na(fee_value)) %>%
mutate(fee_type = fct_relevel(fee_type, "fee_regular_USD", "fee_postdoc_USD", "fee_student_USD")) %>% #reorder
ggplot(aes(x = fee_type, y = fee_value, fill = fee_type)) +
geom_boxplot() +
scale_fill_manual(values=c("#D9D1BE", "#31543B", "#94AA3D")) +
geom_boxplot(alpha = 0.5) +
geom_jitter(width = 0.2, alpha = 0.5) +
scale_y_continuous(labels = scales::dollar) +
scale_x_discrete(labels = c("regular", "postdoc", "student")) +
theme_minimal() +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "",
y = "",
title = "Fee values by fee type")Distribution of individual membership fee amounts across three main fee categories of interest: regular, postdoc and student fees.
#plot fee_regular_USD vs fee_student_USD
fig1B <- dat %>%
ggplot(aes(x = fee_regular_USD, y = fee_student_USD)) + #show student fees against regular fees
geom_point(colour = "#94AA3D", alpha = 0.5) +
stat_smooth(aes(x = fee_regular_USD, y = fee_student_USD), method = "lm", se = FALSE, colour = "#94AA3D", na.rm = TRUE) + #add linear regression line
scale_x_continuous(labels = scales::dollar, limits = c(0, 210)) +
scale_y_continuous(labels = scales::dollar, limits = c(0, 130)) +
xlab('regular fee') +
ylab('discounted fee') +
geom_point(aes(x = fee_regular_USD, y = fee_postdoc_USD), colour = "#31543B", alpha = 0.5) + #show postdoc fees against regular fees
stat_smooth(aes(x = fee_regular_USD, y = fee_postdoc_USD), method = "lm", se = FALSE, colour = "#31543B", na.rm = TRUE) + #add linear regression line
geom_abline(intercept = 0, slope = 0.5, color = "#3E3C3A", linetype = "dashed", linewidth = 0.9) + #add 1:2 line
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "regular fee",
y = "discounted fee",
title = "Regular versus discounted membership fees")Regular versus discounted fees for students (lighter green) and postdocs (darker green). Dashed line represents 50% discount.
Figure 1 - combine 2 panels and save:
#assemble the panels using patchwork package
figure1 <- fig1A / fig1B +
plot_layout(ncol = 2, nrow = 1) +
plot_annotation(tag_levels = "A")
#ggsave(plot = figure1, here("plots", "Fig1AB.png"), width = 18, height = 7, units = "cm", dpi = "retina", scale = 1.2)
#ggsave(plot = figure1, here("plots", "Fig1AB.pdf"), width = 18, height = 7, units = "cm", scale = 1.2)Summarise distribution of the eligibility time frames for postdoc fee discounts:
# sum(!is.na(dat$postdoc_eligibiity_years)) #15 out of 44 (35%)
dat %>%
filter(!is.na(postdoc_eligibiity_years)) %>%
select(postdoc_eligibiity_years) %>%
summarise(mean = mean(postdoc_eligibiity_years),
median = median(postdoc_eligibiity_years),
min = min(postdoc_eligibiity_years),
max = max(postdoc_eligibiity_years))## mean median min max
## 1 4.866667 5 3 8
What is the geographical distribution of the locations (headquarters / registration / incorporation country and continent) of the international societies in ecology and evolution?
Base countries of societies - number of societies per society_country:
#table(dat$society_country, useNA = "always")
#table(is.na(dat$society_country)) #0 - count NA only
#show counts
# dat %>%
# count(society_country) %>%
# arrange(desc(n))
#plot as a simple barplot
dat %>%
#filter(!is.na(society_country)) %>%
count(society_country) %>%
ggplot(aes(x = reorder(society_country, n), y = n)) +
geom_bar(stat = "identity", position = position_dodge(0.9)) +
geom_text(aes(label = n), hjust = -0.15) +
coord_flip()Number of societies per country of origin.
Number of societies per currency:
#table(dat$fees_currency, useNA = "always")
#table(is.na(dat$fees_currency)) #4 missing
#show counts
# dat %>%
# count(fees_currency) %>%
# arrange(desc(n))
#plot as a simple barplot
dat %>%
filter(!is.na(fees_currency)) %>%
count(fees_currency) %>%
ggplot(aes(x = reorder(fees_currency, n), y = n)) +
geom_bar(stat = "identity", position = position_dodge(0.9)) +
geom_text(aes(label = n), hjust = -0.15) +
coord_flip()Number of societies per membership fee currency type.
Does geographical location of base country affect membership pricing?
#add new column to code with the continent the society is based in
dat$continent <- countrycode(sourcevar = dat[, "society_country"],
origin = "country.name",
destination = "continent")
#splt "Americas" into South and North America:
dat <- dat %>% mutate(continent = if_else(society_country %in% c("USA", "Canada", "Mexico"), "North America", continent))
dat <- dat %>% mutate(continent = if_else(society_country %in% c("Brazil", "Argentina"), "South America", continent))
table(dat$continent) #Mostly Europe and North America##
## Africa Asia Europe North America Oceania
## 6 14 55 85 4
## South America
## 5
#compare regular full membership fees by continent:
#summarise by continent
dat %>%
group_by(continent) %>%
summarise(mean = mean(fee_regular_USD, na.rm = TRUE),
SD = sd(fee_regular_USD, na.rm = TRUE),
median = median(fee_regular_USD, na.rm = TRUE),
min = min(fee_regular_USD, na.rm = TRUE),
max = max(fee_regular_USD, na.rm = TRUE))## # A tibble: 6 × 6
## continent mean SD median min max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Africa 23.7 14.1 20.9 10.4 50
## 2 Asia 31.8 22.0 23.2 0.908 66
## 3 Europe 57.1 40.4 51.9 4.76 271.
## 4 North America 86.0 48.4 80 10 201
## 5 Oceania 79.6 42.9 65.7 46.0 141.
## 6 South America 20.8 4.24 21.6 14.4 25
#boxplot with scatters:
fig2A <- dat %>%
filter(!is.na(fee_regular_USD)) %>%
mutate(continent = fct_relevel(continent, "Africa", "South America", "Asia", "Europe", "Oceania", "North America")) %>% #reorder
#mutate(fee_type = fct_relevel(fee_type, "fee_regular_USD", "fee_student_USD", "fee_postdoc_USD")) %>% #reorder
ggplot(aes(x = continent, y = fee_regular_USD, fill = continent)) +
geom_boxplot() +
scale_fill_manual(values = c("#FFFFE5", "#fff7BC", "#FEE391", "#FEC44F", "#FB9A29", "#EC7014")) +
geom_boxplot(alpha = 0.3) +
geom_jitter(width = 0.2, alpha = 0.5) +
scale_y_continuous(labels = scales::dollar) +
theme_minimal() +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "",
y = "",
title = "Regular membership fee values by society base continent")Regular fees by continent.
Do societies from developed economies discount rates for members from
developing economies?
On the flip side, do societies from developing economies inflate rates
for members from other countries?
Process society_country into Global South and Global North categories:
GS_nations <- c("India", "China", "Indonesia", "Pakistan", "Nigeria", "Brazil", "Bangladesh", "Ethiopia", "Philippines", "Egypt", "DR Congo", "Vietnam", "Iran", "Thailand", "Tanzania", "South Africa", "Kenya", "Myanmar", "Colombia", "Uganda", "Sudan", "Argentina", "Algeria", "Iraq", "Afghanistan","Morocco", "Saudi Arabia", "Angola", "Yemen", "Peru", "Malaysia", "Ghana", "Mozambique", "Nepal", "Madagascar", "Ivory Coast", "Venezuela", "Cameroon", "Niger", "North Korea", "Mali", "Burkina Faso", "Syria", "Sri Lanka", "Malawi", "Zambia", "Chile", "Chad", "Ecuador", "Somalia", "Guatemala", "Senegal", "Cambodia", "Zimbabwe", "Guinea", "Rwanda", "Benin", "Burundi", "Tunisia", "Bolivia", "Haiti", "Jordan", "Dominican Republic", "Cuba", "South Sudan", "Honduras", "Papua New Guinea", "Tajikistan", "United Arab Emirates", "Togo", "Sierra Leone", "Laos", "Nicaragua", "Libya", "Paraguay", "Turkmenistan", "El Salvador", "Republic Of The Congo", "Singapore", "Central African Republic", "Liberia", "Palestine", "Lebanon", "Costa Rica", "Mauritania", "Oman", "Panama", "Kuwait", "Eritrea", "Mongolia", "Uruguay", "Bosnia And Herzegovina", "Jamaica", "Gambia", "Qatar", "Botswana", "Namibia", "Gabon", "Lesotho", "Guinea Bissau", "Equatorial Guinea", "Trinidad And Tobago", "Bahrain", "Timor Leste", "Mauritius", "Eswatini", "Djibouti", "Fiji", "Comoros", "Guyana", "Bhutan", "Solomon Islands", "Suriname", "Cape Verde", "Maldives", "Brunei", "Bahamas", "Belize", "Vanuatu", "Barbados", "Sao Tome And Principe", "Samoa", "Saint Lucia", "Kiribati", "Grenada", "Micronesia", "Tonga", "Seychelles", "Saint Vincent And The Grenadines", "Antigua And Barbuda", "Dominica", "Saint Kitts And Nevis", "Marshall Islands", "Nauru")
#count Global South societies per country in the data set:
dat %>%
filter(!is.na(society_country)) %>%
filter(society_country %in% GS_nations) %>%
count(society_country) %>%
arrange(desc(n)) #%>% View() #table of GS nations ## society_country n
## 1 India 6
## 2 Argentina 4
## 3 Kenya 3
## 4 South Africa 3
## 5 Brazil 1
## 6 Philippines 1
#check overlap with GS_nations
intersect(unique(dat$society_country), GS_nations) #all 6 detected: "Kenya" "Argentina" "Philippines" "India" "South Africa" "Brazil" ## [1] "Kenya" "Argentina" "Philippines" "India" "South Africa"
## [6] "Brazil"
## [1] "USA" "UK" "Taiwan" "Spain"
## [5] "Mexico" "Australia" "Italy" "Czech Republic"
## [9] "Netherlands" "Germany" "France" "Hungary"
## [13] "Finland" "Belgium" "Switzerland" "Japan"
## [17] "Korea" "New Zealand" "Norway" "Sweden"
## [21] "Poland" "Portugal"
#add new column to code if society is based in Global South country or not
dat <- dat %>% mutate(GS_country = ifelse(society_country %in% GS_nations, "Global South", "Global North"))
table(dat$GS_country) #only 18 societies from the Global South (6 countries listed above)##
## Global North Global South
## 151 18
#compare full membership fees by GS_nations:
#boxplot with scatters
fig2B <- dat %>%
filter(!is.na(fee_regular_USD)) %>%
mutate(GS_country = fct_relevel(GS_country, "Global South", "Global North")) %>% #reorder
ggplot(aes(x = GS_country, y = fee_regular_USD, fill = GS_country)) +
geom_boxplot() +
scale_fill_manual(values = c("#8A6172", "#904459")) +
#scale_fill_manual(values = c("#888888", "#fff7BC")) +
geom_boxplot(alpha = 0.3) +
geom_jitter(width = 0.2, alpha = 0.5) +
scale_y_continuous(labels = scales::dollar) +
theme_minimal() +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "",
y = "",
title = "Regular membership fee values by base country type")Regular fees for Global North and Global South base countries.
Compare regular fee amounts by base country type:
#not assuming equal variance using library(rstatix)
stat.test <- dat %>%
t_test(fee_regular_USD ~ GS_country) %>%
add_significance()
stat.test #SIGNIF## # A tibble: 1 × 9
## .y. group1 group2 n1 n2 statistic df p p.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 fee_regular_USD Global N… Globa… 151 18 10.8 84.9 1.07e-17 ****
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 fee_regular_USD Global North Global South 1.55 151 18 large
Are discounted/waived fees available for individual members from some countries or regions?
## countries_payless n
## 1 no 97
## 2 yes 72
#table(dat$countries_payless, useNA = "always") #72 yes = 43%
#plot as a simple barplot
dat %>%
#filter(!is.na(countries_payless)) %>%
count(countries_payless) %>%
ggplot(aes(x = reorder(countries_payless, n), y = n)) +
geom_bar(stat = "identity", position = position_dodge(0.9)) +
geom_text(aes(label = n), hjust = -0.15) +
coord_flip() Availability of country-level membership fee discounts.
How are eligible countries or regions defined for fee country-levle fee discounts?
Text mining of country-level discounts descriptions:
#create list of specific words (stemmed) to count within strings
specific.words <- c("develop", "low", "middle", "world bank", "worldbank", "south")
#prepare countries_payless descriptions as a single lowercase string
descriptions <- dat %>%
filter(!is.na(countries_payless)) %>%
select(countries_payless_which) %>%
tolower() #single lowercase string
#sum of all mentions for each word
specific.words.mentions <- specific.words %>%
map_int(~ str_count(tolower(descriptions), .x))
#prepare countries_payless descriptions while keeping them separate for each society
descriptions2 <- tolower(dat$countries_payless_which) #vector of lowercase strings
#sum of mentions per society for each word (counts only one mention per society)
specific.words.mentions2 <- specific.words %>%
map_int(~ sum(str_detect(descriptions2, .x), na.rm = TRUE))
words.df <- tibble(Words = specific.words,
Count_all = specific.words.mentions,
Count_once = specific.words.mentions2)
#plot frequencies of specific words - first mentions in countries_payless_which descriptions.
words.df %>%
ggplot(aes(x = reorder(Words, Count_all),
y = Count_once)) +
geom_col(width = 0.8,
fill = "#2D3F4A",
alpha = 0.7) +
coord_flip() +
scale_y_continuous(breaks = c(0, 10, 20, 30, 40, 50),
limits = c(0, 50)) +
theme_bw() +
labs(x = "Word stem",
y = "Count of first mention per society") +
#scale_x_discrete(labels = NULL) +
#labs(x = "") + #used to remove vertical labels, also breaks = NULL
theme(legend.position = "none", axis.title.x = element_text(size = 10))Numbers of first mentions of selected stemmed words in descriptions of country-level membership fee discounts.
Intersection of base country type and country-level fee discounts:
#wacolors$palouse #see colour palette
#table(dat$countries_payless, dat$GS_country)
## Mosaic plots using ggmosaic:
# fig2C <- ggplot(data = dat) +
# geom_mosaic(aes(x = product(countries_payless, GS_country), fill = countries_payless)) +
# theme(plot.title = element_text(hjust = 0.5)) +
# scale_fill_manual(values = c("#DDAA33","#DDCC77")) +
# geom_mosaic_text(aes(x = product(countries_payless, GS_country), label = after_stat(.wt)), as.label = TRUE) +
# theme_minimal() +
# theme(legend.position = "none") +
# labs(x = "society based in", y = "country-levele member discounts", title = "Base country type and country-level member discounts")
# fig2C <- ggplot(data = dat) +
# geom_mosaic(aes(x = product(GS_country, countries_payless), fill = GS_country)) +
# theme(plot.title = element_text(hjust = 0.5)) +
# scale_fill_manual(values = c("#904459", "#DDAA33")) + #,"#904459", "#BB5566"
# geom_mosaic_text(aes(x = product(GS_country, countries_payless), label = after_stat(.wt)), as.label = TRUE) +
# theme_minimal() +
# theme(legend.position = "none") +
# labs(x = "country-level member discounts", y = "society based in", title = "Base country type and country-level member discounts")
## Stacked bar plot:
fig2C <- ggplot(data = dat, aes(x = GS_country, fill = countries_payless)) +
scale_fill_manual("Country-level member fee discounts: ", breaks = c("yes", "no"),
guide = "legend", values = c("#C0A43D", "#CCBA98")) +
geom_bar(width = 0.6) +
geom_text(stat = "count", aes(label = after_stat(count)),
size = 4, color = "white", hjust = 1.5, vjust = 0.5, position = "stack") +
theme_bw() +
theme(legend.position = "top")+
labs(x = "society based in", y = "count of societies") +
lims(y = c(0, 160)) +
coord_flip()Regular fee country-level discounts by base country type.
Test for country-level fee discounts and base country type:
#make contingency table of GS_nations versus country-level discount availability (countries_payless):
conttable <- table(dat$GS_country, dat$countries_payless) #6/18 (33%) GS have fee discounts for GS, 66/151 (44%) GN have fee discounts for GS
#rownames(conttable) <- unique(dat$GS_country)
#colnames(conttable) <- c("no discounts", "discounts")
fisher.test(conttable) #NS##
## Fisher's Exact Test for Count Data
##
## data: conttable
## p-value = 0.4583
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.1884282 1.9775668
## sample estimates:
## odds ratio
## 0.6455665
Are increased fees available for individual members from some countries or regions?
## countries_paymore n
## 1 no 146
## 2 yes 23
#table(dat$countries_paymore, useNA = "always") #23 yes = 13%
#plot as a simple barplot
dat %>%
#filter(!is.na(countries_paymore)) %>%
count(countries_paymore) %>%
ggplot(aes(x = reorder(countries_paymore, n), y = n)) +
geom_bar(stat = "identity", position = position_dodge(0.9)) +
geom_text(aes(label = n), hjust = -0.15) +
coord_flip() Availability of country-level membership fee increases.
How are eligible countries or regions defined for fee country-level fee increases?
Text mining of country-level increases descriptions:
#create list of specific words (stemmed) to count within strings
specific.words <- c("develop", "high", "foreign", "outside", "overseas", "north")
#prepare countries_paymore descriptions as a single lowercase string
descriptions <- dat %>%
filter(!is.na(countries_paymore)) %>%
select(countries_paymore_which) %>%
tolower() #single lowercase string
#sum of all mentions for each word
specific.words.mentions <- specific.words %>%
map_int(~ str_count(tolower(descriptions), .x))
#prepare countries_paymore descriptions while keeping them separate for each society
descriptions2 <- tolower(dat$countries_paymore_which) #vector of lowercase strings
#sum of mentions per society for each word (counts only one mention per society)
specific.words.mentions2 <- specific.words %>%
map_int(~ sum(str_detect(descriptions2, .x), na.rm = TRUE))
words.df <- tibble(Words = specific.words,
Count_all = specific.words.mentions,
Count_once = specific.words.mentions2)
#plot frequencies of specific words - first mentions in countries_paymore_which descriptions.
words.df %>%
ggplot(aes(x = reorder(Words, Count_all),
y = Count_once)) +
geom_col(width = 0.8,
fill = "#2D3F4A",
alpha = 0.7) +
coord_flip() +
scale_y_continuous(breaks = c(0, 10, 20),
limits = c(0, 20)) +
theme_bw() +
labs(x = "Word stem",
y = "Count of first mention per society") +
#scale_x_discrete(labels = NULL) +
#labs(x = "") + #used to remove vertical labels, also breaks = NULL
theme(legend.position = "none", axis.title.x = element_text(size = 10))Numbers of first mentions of selected stemmed words in descriptions of country-level membership fee increases.
Intersection of base country type and country-level fee increases:
#wacolors$palouse #see colour palette
#table(dat$countries_paymore, dat$GS_country)
## mosaic plots using ggmosaic:
# fig2D <- ggplot(data = dat) +
# geom_mosaic(aes(x = product(countries_paymore, GS_country), fill = countries_paymore)) +
# theme(plot.title = element_text(hjust = 0.5)) +
# scale_fill_manual(values = c("#904459", "#BB5566")) +
# geom_mosaic_text(aes(x = product(countries_paymore, GS_country), label = after_stat(.wt)), as.label = TRUE) +
# theme_minimal() +
# theme(legend.position = "none") +
# labs(x="society based in", y = "country-levele member increases", title = "Base country type and country-level member increases")
# fig2D <- ggplot(data = dat) +
# geom_mosaic(aes(x = product(GS_country, countries_paymore), fill = GS_country)) +
# theme(plot.title = element_text(hjust = 0.5)) +
# scale_fill_manual(values = c("#904459", "#DDAA33")) + #,"#904459", "#BB5566"
# geom_mosaic_text(aes(x = product(GS_country, countries_paymore), label = after_stat(.wt)), as.label = TRUE) +
# theme_minimal() +
# theme(legend.position = "none") +
# labs(x = "country-level member fee increases", y = "society based in", title = "Base country type and country-level member fee increases")
## Stacked bar plot:
fig2D <- ggplot(data = dat, aes(x = GS_country, fill = countries_paymore)) +
scale_fill_manual("Country-level member fee increases: ", breaks = c("yes", "no"),
guide = "legend", values = c("#C0A43D", "#CCBA98")) +
geom_bar(width = 0.6) +
geom_text(stat = "count", aes(label = after_stat(count)),
size = 4, color = "white", hjust = 1.5, vjust = 0.5, position = "stack") +
theme_bw() +
theme(legend.position = "top")+
labs(x = "society based in", y = "count of societies") +
lims(y = c(0, 160)) +
coord_flip()Regular fee country-level increases by base country type.
Figure 2 - combine 4 panels and save:
#assemble the panels using patchwork package
figure2 <- fig2A / fig2B / fig2C / fig2D +
plot_layout(ncol = 2, nrow = 2) +
plot_annotation(tag_levels = "A")
#ggsave(plot = figure2, here("plots", "Fig2ABCD.png"), width = 18, height = 12, units = "cm", dpi = "retina", scale = 1.7)
#ggsave(plot = figure2, here("plots", "Fig2ABCD.pdf"), width = 18, height = 12, units = "cm", scale = 1.7)Test for country-level fee increases and base country type:
#make contingency table of GS_nations versus country-level discount availability (countries_paymore):
conttable <- table(dat$GS_country, dat$countries_paymore) #6/18 (33%) GS have fee increases for GS, 66/151 (44%) GN have fee increases for GS
#rownames(conttable) <- unique(dat$GS_country)
#colnames(conttable) <- c("no increases", "increases")
fisher.test(conttable) #SIGNIFICANT##
## Fisher's Exact Test for Count Data
##
## data: conttable
## p-value = 7.831e-10
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 9.490279 151.962153
## sample estimates:
## odds ratio
## 34.82255
National versus International societies:
#table(dat$society_type) #needs cleaning - aggregate into national by name vs. international (not national by name)
dat$society_NI <- substr(dat$society_type, 1, 1) #Extract first letter
# table(dat$society_NI) #52 (31%) national by name
table(dat$continent, dat$society_NI) #North America and Europe more likely to be base of international societies##
## I N
## Africa 2 4
## Asia 6 8
## Europe 41 14
## North America 65 20
## Oceania 2 2
## South America 1 4
##
## Fisher's Exact Test for Count Data
##
## data: table(dat$continent, dat$society_NI)
## p-value = 0.002746
## alternative hypothesis: two.sided
table(dat$GS_country, dat$society_NI) #Global South countries are more likely to be base of national societies##
## I N
## Global North 111 40
## Global South 6 12
fisher.test(table(dat$GS_country, dat$society_NI)) #p-value = 0.0009914, odds ratio: 5.483734, 95 percent confidence interval: 1.765347 19.059671##
## Fisher's Exact Test for Count Data
##
## data: table(dat$GS_country, dat$society_NI)
## p-value = 0.0009914
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.765347 19.059671
## sample estimates:
## odds ratio
## 5.483734
Are discounted individual membership fees currently available for the following groups: students, postdocs, retired/emeritus, unemployed, employed part-time, junior (pre-university), family, educators/outreach/communication non-academic specialists, general community/public, or any other groups?
Count types of discounts (discount_types need to be splitted):
#replace "no fees" with "free option" to avoid confusion with "no fees" in dat$discount_types using gsub
dat$discount_types <- gsub("no fees", "free option", dat$discount_types)
#count how many have NA = no discounts at all (flat fees)
table(is.na(dat$discount_types)) #15##
## FALSE TRUE
## 154 15
#replace NA with "no discounts" to avoid confusion with "no fees"
dat <- dat %>% mutate(discount_types = ifelse(is.na(discount_types), "no discounts", discount_types))
#create new variable with discount types per society as a list
dat <- dat %>% mutate(discount_types_list = str_split(dat$discount_types, pattern = ", "))
#create new variable with counts of discount types per society
dat <- dat %>% mutate(discount_types_count = lengths(discount_types_list))
#simple table of number of discount types per society
table(dat$discount_types_count) #60 (35%) have only one type of discount##
## 1 2 3 4 5 6 7
## 60 43 33 23 6 3 1
#count each discount by type and show percentage of societies
dat %>%
unnest(discount_types_list) %>%
count(discount_types_list) %>%
filter(discount_types_list!= "") %>%
arrange(desc(n)) %>%
mutate(percentage = n/169 * 100) -> discount_types_table
discount_types_table## # A tibble: 14 × 3
## discount_types_list n percentage
## <chr> <int> <dbl>
## 1 student 138 81.7
## 2 retired/emeritus 64 37.9
## 3 postdoc / ECR (excluding students) 41 24.3
## 4 family 34 20.1
## 5 other 32 18.9
## 6 non-academic specialists 18 10.7
## 7 junior 17 10.1
## 8 no discounts 15 8.88
## 9 unemployed 10 5.92
## 10 free option 7 4.14
## 11 general community/public 5 2.96
## 12 employed part-time 4 2.37
## 13 fees proportional to income brackets 4 2.37
## 14 discretionary fee amount 3 1.78
#make a simple bar plot
discount_types_table %>%
filter(discount_types_list!= "") %>%
ggplot(aes(x = reorder(discount_types_list, n), y = n)) +
geom_bar(stat = "identity", position = position_dodge(0.9)) +
geom_text(aes(label = n), hjust = -0.15) +
lims(y = c(0, 150)) +
coord_flip()#dat$discount_types_list <- str_split(dat$discount_types, pattern = ", ") #same as above but modifies original data frameFrequencies of individual-level membership fee discount types.
Make upset plot to show frequencies of combinations of individual discount types:
#make upset plot using library(ggupset)
figure3 <- dat %>%
filter(discount_types_list != "") %>%
mutate(discount_types_list = ifelse(discount_types_list == "no fees", "free option", discount_types_list)) %>%
ggplot(aes(x = discount_types_list)) +
geom_bar(fill = "#21281D",width = 0.8) +
scale_y_continuous(limits = c(0, 40)) +
scale_x_upset(order_by = c("freq")) +
theme_combmatrix(combmatrix.panel.striped_background = FALSE,
combmatrix.panel.point.color.fill = "#21281D",
combmatrix.panel.line.size = 0,
plot.title = element_text(family = "sans", size = 16, face = "plain", color = "#21281D"),
axis.title.x = element_text(family="sans", size = 12, color = "#21281D", face="plain"),
axis.title.y = element_text(family="sans", size = 12, color = "#21281D", face = "plain", vjust = -2),
) +
theme(plot.title = element_text(hjust = 0.5)) +
labs(title = "Combinations of main types of fee discounts",
x = "",
y = "count of societies")
#ggsave(plot = figure3, here("plots", "Fig3.png"), width = 18, height = 8, units = "cm", dpi = "retina", scale = 1.5)
#ggsave(plot = figure3, here("plots", "Fig3.pdf"), width = 18, height = 8, units = "cm", scale = 1.5)Combinations of the main types of individual-level discounts.
How many individual-level discount categories per society?
##
## 1 2 3 4 5 6 7
## 60 43 33 23 6 3 1
#table with percentages
round(table(dat$discount_types_count)/length(dat$discount_types_count)*100,1)##
## 1 2 3 4 5 6 7
## 35.5 25.4 19.5 13.6 3.6 1.8 0.6
#plot number of discountcategories per society
dat %>%
#mutate(text = fct_reorder(text, value)) %>%
ggplot(aes(x = discount_types_count)) +
geom_histogram(alpha = 0.8, binwidth = 1, fill="black", col="grey") +
scale_x_continuous(breaks = seq(1,7, by = 1)) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(
legend.position = "none",
panel.spacing = unit(0.1, "lines"),
strip.text.x = element_text(size = 8)
) +
labs(title = "Count of discount types per society",
x = "count of discounts",
y = "count of societies") Frequencies of number of discount types per society.
Are complete or partial individual membership fee waivers currently available on individual request?
#table(dat$individual_waivers, useNA = "always") #25 yes = 30%
#plot as a simple barplot
dat %>%
#filter(!is.na(individual_waivers)) %>%
count(individual_waivers) %>%
ggplot(aes(x = reorder(individual_waivers, n), y = n)) +
geom_bar(stat = "identity", position = position_dodge(0.9)) +
geom_text(aes(label = n), hjust = -0.15) +
coord_flip()Do societies with commitment to EDI (Equity, Diversity, Inclusion) stated on the website or policy documents (or having dedicated EDI structures) offer a more inclusive individual membership fee structure (e.g., more options for discounts)?
Summarise data on EDI statements and structures:
#Presence of EDI statement as a simple barplot:
# dat %>%
# count(society_EDI_statement) %>%
# ggplot(aes(x = reorder(society_EDI_statement, n), y = n)) +
# geom_bar(stat = "identity", position = position_dodge(0.9)) +
# coord_flip()
#
# #Presence of EDI structures as a simple barplot:
# dat %>%
# count(society_EDI_structure) %>%
# ggplot(aes(x = reorder(society_EDI_structure, n), y = n)) +
# geom_bar(stat = "identity", position = position_dodge(0.9)) +
# coord_flip()
#Presence of a EDI statement as % of all societies
table(dat$society_EDI_statement, useNA = "always")/nrow(dat)*100 # 46.7% have EDI statement##
## no yes <NA>
## 53.25444 46.74556 0.00000
#Presence of a EDI structure as % of all societies
table(dat$society_EDI_structure, useNA = "always")/nrow(dat)*100 # 36.7% have EDI structure##
## no yes <NA>
## 63.31361 36.68639 0.00000
#A simple table of overlap between presence of EDI statement and structures
#table(dat$society_EDI_statement, dat$society_EDI_structure)
#table(dat$society_EDI_statement, dat$countries_payless) #more likely with EDI
ft <- fisher.test(table(dat$society_EDI_statement, dat$society_EDI_structure))
#Plot if societies with EDI statement also have dedicated EDI structures
#mosaic plot using ggmosaic
ggplot(data = dat) +
geom_mosaic(aes(x = product(society_EDI_structure, society_EDI_statement), fill = society_EDI_structure)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(society_EDI_structure, society_EDI_statement), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) + labs(x = "society has an EDI structure", y = "society has an EDI statement", title = "Society EDI statements and structures")#test if societies with EDI statement also have dedicated EDI structures
fisher.test(table(dat$society_EDI_statement, dat$society_EDI_structure)) #odds ratio 132.9641, CI: 30.87566 1198.51758, p-value < 2.2e-16##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_statement, dat$society_EDI_structure)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 30.87566 1198.51758
## sample estimates:
## odds ratio
## 132.9641
Overlap between presence of EDI statement and structures in societies.
EDI structure vs. base country type:
#Plot societies with EDI structure vs. base country type
#table(dat$society_EDI_structure, dat$countries_payless) #more likely with EDI
ft <- fisher.test(table(dat$society_EDI_structure, dat$GS_country))
#mosaic plot using ggmosaic
ggplot(data = dat) +
geom_mosaic(aes(x = product(GS_country, society_EDI_structure), fill = GS_country)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(GS_country, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) + labs(x = "society has an EDI structure", y = "society base country", title = "Society EDI commitment and base country type")#test if societies with EDI structure are more likely to be based in Global North countries
fisher.test(table(dat$society_EDI_structure, dat$GS_country)) #SIGNIFICANT##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_structure, dat$GS_country)
## p-value = 0.0001762
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.0000000 0.3443774
## sample estimates:
## odds ratio
## 0
#test if societies with EDI statement are more likely to be based in Global North countries
fisher.test(table(dat$society_EDI_statement, dat$GS_country)) #SIGNIFICANT - same pattern as for EDI structure##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_statement, dat$GS_country)
## p-value = 0.001897
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.01311125 0.54430664
## sample estimates:
## odds ratio
## 0.1213495
Overlap between Presence of EDI structures and structures in societies.
NOTE: No change in the results below if only done on the Global North countries.
Presence of EDI structures vs number of discount types per society:
#test if distributions of counts are the same:
table(dat$discount_types_count, dat$society_EDI_structure) #distributions of counts for societies with and without EDI structure##
## no yes
## 1 51 9
## 2 29 14
## 3 16 17
## 4 8 15
## 5 2 4
## 6 0 3
## 7 1 0
Adjust labels for plotting society_EDI_structure:
#change labels to use for plot facets
dat$society_EDI_structure2 <- factor(dat$society_EDI_structure, levels = c("no", "yes"),
labels = c("no EDI structure", "has EDI structure"))Presence of EDI structures vs presence of country-level fee discounts:
#table(dat$society_EDI_structure, dat$countries_payless) #more likely with EDI
ft <- fisher.test(table(dat$society_EDI_structure, dat$countries_payless))
#mosaic plot using ggmosaic
ggplot(data = dat) +
geom_mosaic(aes(x = product(countries_payless, society_EDI_structure), fill = countries_payless)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(countries_payless, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has country-level discounts", title = "Society EDI commitment and country-level discounts")#test if societies with EDI structure also have country-level fee discounts
fisher.test(table(dat$society_EDI_structure, dat$countries_payless)) #SIGNIFICANT##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_structure, dat$countries_payless)
## p-value = 4.562e-06
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 2.323953 9.923784
## sample estimates:
## odds ratio
## 4.737207
#test if societies with EDI statement also have country-level fee discounts
fisher.test(table(dat$society_EDI_statement, dat$countries_payless)) #SIGNIFICANT##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_statement, dat$countries_payless)
## p-value = 2.31e-06
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 2.351051 9.687224
## sample estimates:
## odds ratio
## 4.709654
Overlap between Presence of EDI structures and country-level fee discounts.
Presence of EDI structures vs presence of country-level fee increases:
#table(dat$society_EDI_structure, dat$countries_paymore) #more likely without EDI
ft <- fisher.test(table(dat$society_EDI_structure, dat$countries_paymore))
#mosaic plot using ggmosaic
ggplot(data = dat) +
geom_mosaic(aes(x = product(countries_paymore, society_EDI_structure), fill = countries_paymore)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(countries_paymore, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has country-level fee increases", title = "Society EDI commitment and country-level fee increases")#test if societies with EDI structure also have fee waivers on individual request
fisher.test(table(dat$society_EDI_structure, dat$countries_paymore)) #SIGNIFICANT##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_structure, dat$countries_paymore)
## p-value = 0.002132
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.01508173 0.59813571
## sample estimates:
## odds ratio
## 0.1377006
#test if societies with EDI statement also have fee waivers on individual request
fisher.test(table(dat$society_EDI_statement, dat$countries_paymore)) #SIGNIFICANT##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_statement, dat$countries_paymore)
## p-value = 0.01253
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.07493325 0.81346626
## sample estimates:
## odds ratio
## 0.2722107
Overlap between Presence of EDI structures and country-level fee increases.
Presence of EDI structures vs fee waivers on individual request:
#table(dat$society_EDI_structure, dat$individual_waivers)
ft <- fisher.test(table(dat$society_EDI_structure, dat$individual_waivers))
#mosaic plot using ggmosaic
ggplot(data = dat) +
geom_mosaic(aes(x = product(individual_waivers, society_EDI_structure), fill = individual_waivers)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(individual_waivers, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has fee waivers on request", title = "Society EDI commitment and fee waivers on request")#test if societies with EDI structure also have fee waivers on individual request
fisher.test(table(dat$society_EDI_structure, dat$individual_waivers)) #SIGNIFICANT##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_structure, dat$individual_waivers)
## p-value = 0.00319
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.437306 10.420808
## sample estimates:
## odds ratio
## 3.754839
#test if societies with EDI statement also have fee waivers on individual request
fisher.test(table(dat$society_EDI_statement, dat$individual_waivers)) #SIGNIFICANT##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_statement, dat$individual_waivers)
## p-value = 0.001999
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.569198 14.273453
## sample estimates:
## odds ratio
## 4.395034
Overlap between Presence of EDI structures and fee waivers on individual request.
Prepare data to test tests for other fee categories and EDI structure:
dat3 <- dat
#dat3 %>% unnest(discount_types_list, keep_empty = TRUE) %>% View() #via unnesting
discount.ids <- unique(unlist(dat3$discount_types_list))
discount.matrix <- t(sapply(dat3$discount_types_list, function(x) table(factor(x, levels = discount.ids))))
#str(discount.matrix)
rownames(discount.matrix) <- dat3$society
dat3 <- cbind(dat3, as_tibble(discount.matrix))
#names(dat3)Presence of EDI structures vs presence of student fee discounts:
dat3$student <- factor(dat3$"student")
levels(dat3$student) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$student)) #SIGNIFICANT - more student discounts with EDI
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(student, society_EDI_structure), fill = student)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(student, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has student fee discount", title = "Society EDI commitment and student fee discount")Overlap between Presence of EDI structures and student fee discounts.
Presence of EDI structures vs presence of retired/emeritus fee discounts:
dat3$retired <- factor(dat3$"retired/emeritus")
levels(dat3$retired) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$retired)) #SIGNIFICANT - more retired/emeritus discounts
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(retired, society_EDI_structure), fill = retired)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(student, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has retired/emeritus fee discount", title = "Society EDI commitment and retired/emeritus fee discount")Overlap between Presence of EDI structures and retired/emeritus fee discounts.
Presence of EDI structures vs presence of postdoc fee discounts:
dat3$postdoc <- factor(dat3$"postdoc / ECR (excluding students)")
levels(dat3$postdoc) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$postdoc)) #SIGNIFICANT - more postdoc discounts with EDI
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(postdoc, society_EDI_structure), fill = postdoc)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(postdoc, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has postdoc fee discount", title = "Society EDI commitment and postdoc fee discount")Overlap between Presence of EDI structures and postdoc fee discounts.
Presence of EDI structures vs presence of non-academic specialists fee discounts:
dat3$specialist <- factor(dat3$"non-academic specialists")
levels(dat3$specialist) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$specialist)) #SIGNIFICANT - more specialist discounts with EDI
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(specialist, society_EDI_structure), fill = specialist)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(specialist, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has specialist fee discount", title = "Society EDI commitment and non-academic specialist fee discount")Overlap between Presence of EDI structures and non-academic fee discounts.
Presence of EDI structures vs presence of community fee discounts:
dat3$community <- factor(dat3$"general community/public")
levels(dat3$community) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$community)) #NS - no association
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(community, society_EDI_structure), fill = community)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(community, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has community/public fee discount", title = "Society EDI commitment and community/public fee discount")Overlap between Presence of EDI structures and general community/public fee discounts.
Presence of EDI structures vs presence of family fee discounts:
dat3$family <- factor(dat3$"family")
levels(dat3$family) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$family)) #NS - no more family discounts with EDI
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(family, society_EDI_structure), fill = family)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(family, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has family fee discount", title = "Society EDI commitment and family fee discount")Overlap between Presence of EDI structures and family fee discounts.
Presence of EDI structures vs presence of junior fee discounts:
dat3$junior <- factor(dat3$"junior")
levels(dat3$junior) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$junior)) #NS - no more junior discounts with EDI
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(junior, society_EDI_structure), fill = junior)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(junior, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has junior fee discount", title = "Society EDI commitment and junior fee discount")Overlap between Presence of EDI structures and junior fee discounts.
Presence of EDI structures vs presence of discretionary fee discounts:
dat3$discretionary <- factor(dat3$"discretionary fee amount")
levels(dat3$discretionary) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$discretionary)) #NS - no more discretionary discounts with EDI
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(discretionary, society_EDI_structure), fill = discretionary)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(discretionary, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has discretionary fee discount", title = "Society EDI commitment and discretionary fee discount")Overlap between Presence of EDI structures and discretionary fee discounts.
Presence of EDI structures vs presence of a “free option” fee discounts:
dat3$free_option <- factor(dat3$"free option")
levels(dat3$free_option) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$free_option)) #NS - no more free_option discounts with EDI
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(free_option, society_EDI_structure), fill = free_option)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(free_option, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has a free fee option", title = "Society EDI commitment and free fee option")Overlap between Presence of EDI structures and free option fee discounts.
Presence of EDI structures vs presence of fees proportional to income brackets (sliding scale):
dat3$slidingscale <- factor(dat3$"fees proportional to income brackets")
levels(dat3$slidingscale) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$slidingscale)) #NS - no more sliding scale discounts with EDI
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(slidingscale, society_EDI_structure), fill = slidingscale)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(slidingscale, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has sliding scale fee discount", title = "Society EDI commitment and sliding scale fee discount")Overlap between Presence of EDI structures and sliding scale fees.
Presence of EDI structures vs presence of unemployed fee discounts:
dat3$unemployed <- factor(dat3$"unemployed")
levels(dat3$unemployed) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$unemployed)) #NS - no more unemployed discounts with EDI
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(unemployed, society_EDI_structure), fill = unemployed)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(unemployed, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has unemployed fee discount", title = "Society EDI commitment and unemployed fee discount")Overlap between Presence of EDI structures and unemployed fee discounts.
Presence of EDI structures vs presence of part-time employment fee discounts:
dat3$part_time <- factor(dat3$"employed part-time")
levels(dat3$part_time) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$part_time)) #NS - no more part-time employment discounts with EDI
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(part_time, society_EDI_structure), fill = part_time)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(part_time, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has part-time employment fee discount", title = "Society EDI commitment and part-time employment fee discount")Overlap between Presence of EDI structures and part-time employment fee discounts.
Presence of EDI structures vs presence of other fee discounts:
dat3$other <- factor(dat3$"other")
levels(dat3$other) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$other)) #NS - no difference
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(other, society_EDI_structure), fill = other)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(other, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has other fee discounts", title = "Society EDI commitment and other fee discounts")Overlap between Presence of EDI structures and presence of other fee discounts.
Presence of EDI structures vs presence of fess without any discounts:
dat3$no_discounts <- factor(dat3$"no discounts")
levels(dat3$no_discounts) <- list("no" = "0", "yes" = "1")
ft <- fisher.test(table(dat3$society_EDI_structure, dat3$no_discounts)) #SIGNIFICANT - less likely to have no discounts
#mosaic plot using ggmosaic
ggplot(data = dat3) +
geom_mosaic(aes(x = product(no_discounts, society_EDI_structure), fill = no_discounts)) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("#9970AB", "#E7D4E8")) +
geom_mosaic_text(aes(x = product(no_discounts, society_EDI_structure), label = after_stat(.wt)), as.label = TRUE) +
theme_minimal() +
theme(legend.position = "none") +
annotate("point", x = 0.5, y = 0.5, size = 40, color = "#AA3377") +
annotate("text", label = paste("OR=", round(ft$estimate,2),"\n", "CI=", round(ft$conf.int[1],2), "-", round(ft$conf.int[2],2), "\n p=", round(ft$p.value,3), sep = ""), x = 0.5, y = 0.5, size = 4, colour = "white", fontface = "italic" ) +
labs(x = "society has an EDI structure", y = "society has no fee discounts ", title = "Society EDI commitment and no fee discounts")Overlap between Presence of EDI structures and lack of any fee discounts.
Presence of EDI structures vs regular fee amounts:
#compare full membership fees by EDI structure:
#boxplot with scatters
dat %>%
filter(!is.na(fee_regular_USD)) %>%
mutate(society_EDI_structure = fct_relevel(society_EDI_structure, "yes", "no")) %>% #reorder
ggplot(aes(x = society_EDI_structure, y = fee_regular_USD, fill = society_EDI_structure)) +
geom_boxplot() +
scale_fill_manual(values = c("#fff7BC", "#888888")) +
geom_boxplot(alpha = 0.3) +
geom_jitter(width = 0.2, alpha = 0.5) +
scale_y_continuous(labels = scales::dollar) +
theme_minimal() +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "",
y = "",
title = "Regular membership fee values by society EDI structure")Regular fees for societies with and without EDI structures.
Compare regular fee amounts by presence of EDI structures:
#not assuming equal variance using library(rstatix)
stat.test <- dat %>%
# filter(GS_country == "Global North") %>%
t_test(fee_regular_USD ~ society_EDI_structure) %>%
add_significance()
stat.test #SIGNIF## # A tibble: 1 × 9
## .y. group1 group2 n1 n2 statistic df p p.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 fee_regular_USD no yes 107 62 -5.87 124. 3.75e-8 ****
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 fee_regular_USD no yes -0.941 107 62 large
Presence of EDI structures vs student fee amount:
#compare full membership fees by EDI structure:
#boxplot with scatters
dat %>%
filter(!is.na(fee_student_USD)) %>%
mutate(society_EDI_structure = fct_relevel(society_EDI_structure, "yes", "no")) %>% #reorder
ggplot(aes(x = society_EDI_structure, y = fee_student_USD, fill = GS_country)) +
geom_boxplot() +
#scale_fill_manual(values = c("#fff7BC", "#888888")) +
geom_boxplot(alpha = 0.3) +
geom_jitter(width = 0.2, alpha = 0.5) +
scale_y_continuous(labels = scales::dollar) +
theme_minimal() +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "",
y = "",
title = "Student membership fee values by society EDI structure")Student fees for societies with and without EDI structure.
Compare student fee amounts by presence of EDI structure:
#not assuming equal variance using library(rstatix)
stat.test <- dat %>%
# filter(GS_country == "Global North") %>%
t_test(fee_student_USD ~ society_EDI_structure) %>%
add_significance()
stat.test #SIGNIF## # A tibble: 1 × 9
## .y. group1 group2 n1 n2 statistic df p p.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 fee_student_USD no yes 80 61 -4.84 104. 0.00000456 ****
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 fee_student_USD no yes -0.840 80 61 large
Presence of EDI structures vs postdoc fee amount:
#compare full membership fees by EDI structure:
#boxplot with scatters
dat %>%
filter(!is.na(fee_postdoc_USD)) %>%
mutate(society_EDI_structure = fct_relevel(society_EDI_structure, "yes", "no")) %>% #reorder
ggplot(aes(x = society_EDI_structure, y = fee_postdoc_USD, fill = society_EDI_structure)) +
geom_boxplot() +
scale_fill_manual(values = c("#fff7BC", "#888888")) +
geom_boxplot(alpha = 0.3) +
geom_jitter(width = 0.2, alpha = 0.5) +
scale_y_continuous(labels = scales::dollar) +
theme_minimal() +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "",
y = "",
title = "Postdoc membership fee values by society EDI structure")Postdoc fees for societies with and without EDI structures.
Compare postdoc fee amounts by presence of EDI structures:
#not assuming equal variance using library(rstatix)
stat.test <- dat %>%
# filter(GS_country == "Global North") %>%
t_test(fee_postdoc_USD ~ society_EDI_structure) %>%
add_significance()
stat.test #SIGNIF## # A tibble: 1 × 9
## .y. group1 group2 n1 n2 statistic df p p.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 fee_postdoc_USD no yes 14 30 -3.18 28.5 0.00349 **
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 fee_postdoc_USD no yes -1.01 14 30 large
Presence of EDI structures vs student fee discount percentage:
#boxplot with scatters
dat %>%
filter(!is.na(fee_student_pct)) %>%
mutate(society_EDI_structure = fct_relevel(society_EDI_structure, "yes", "no")) %>% #reorder
ggplot(aes(x = society_EDI_structure, y = fee_student_pct, fill = society_EDI_structure)) +
geom_boxplot() +
scale_fill_manual(values = c("#fff7BC", "#888888")) +
geom_boxplot(alpha = 0.3) +
geom_jitter(width = 0.2, alpha = 0.5) +
#scale_y_continuous(labels = scales::percent()) +
theme_minimal() +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "",
y = "",
title = "Student membership fee discount by society EDI structure")Student discount percent with and without EDI structure.
Test for student fee discount percentage by EDI structure:
#not assuming equal variance using library(rstatix)
stat.test <- dat %>%
# filter(GS_country == "Global North") %>%
t_test(fee_student_pct ~ society_EDI_structure) %>%
add_significance()
stat.test #NS## # A tibble: 1 × 9
## .y. group1 group2 n1 n2 statistic df p p.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 fee_student_pct no yes 79 59 2.57 136. 0.0113 *
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 fee_student_pct no yes 0.432 79 59 small
Presence of EDI structures vs postdoc fee discount percentage:
#boxplot with scatters for fee_postdoc_pct
dat %>%
filter(!is.na(fee_postdoc_pct)) %>%
mutate(society_EDI_structure = fct_relevel(society_EDI_structure, "yes", "no")) %>% #reorder
ggplot(aes(x = society_EDI_structure, y = fee_postdoc_pct, fill = society_EDI_structure)) +
geom_boxplot() +
scale_fill_manual(values = c("#fff7BC", "#888888")) +
geom_boxplot(alpha = 0.3) +
geom_jitter(width = 0.2, alpha = 0.5) +
#scale_y_continuous(labels = scales::percent()) +
theme_minimal() +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "",
y = "",
title = "Postdoc membership fee discount by society EDI structure")Postdoc discount percent with and without EDI structures.
Test for postdoc fee discount percentage by EDI structure:
#not assuming equal variance using library(rstatix)
stat.test <- dat %>%
# filter(GS_country == "Global North") %>%
t_test(fee_postdoc_pct ~ society_EDI_structure) %>%
add_significance()
stat.test #NS## # A tibble: 1 × 9
## .y. group1 group2 n1 n2 statistic df p p.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 fee_postdoc_pct no yes 12 30 -0.922 24.3 0.366 ns
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 fee_postdoc_pct no yes -0.303 12 30 small
Create a single figure with student and postdoc discount levels by EDI structures:
## compare two basic membership discounts fee_student_pct and fee_postdoc_pct by EDI structure:
#prepare data in long format
dat_long2 <- dat %>%
select(society_EDI_structure, fee_student_pct, fee_postdoc_pct) %>%
pivot_longer(cols = c(fee_student_pct, fee_postdoc_pct), names_to = "fee_type", values_to = "fee_value") %>%
mutate(fee_type = ifelse(fee_type == "fee_student_pct", "student", "postdoc")) %>%
mutate(fee_value = fee_value/100) %>% #as proportion
mutate(society_EDI_structure = fct_relevel(society_EDI_structure, "yes", "no")) #reorder
levels(dat_long2$society_EDI_structure) <- c("without EDI structure", "with EDI structure") #rename factor levels
#str(dat_long2)
#boxplot with scatters of jittered points in 2 facets by discount type
plot_EDI_discount_types_percent <- dat_long2 %>%
filter(!is.na(fee_value)) %>% #remove missing values
mutate(fee_type = fct_relevel(fee_type, "student", "postdoc")) %>% #reorder
ggplot(aes(x = society_EDI_structure, y = fee_value, fill = society_EDI_structure)) +
geom_boxplot(alpha = 1) +
geom_jitter(width = 0.1, alpha = 0.5) +
scale_fill_manual(values=c("#4477AA", "#66CCEE")) +
facet_wrap(~fee_type) +
scale_y_continuous(labels = scales::label_percent(), limits = c(0,1)) +
#scale_x_discrete(labels = c("student", "postdoc")) +
theme_minimal() +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "",
y = "discount percentage",
title = "Discount levels for students and postdocs by society EDI structure")
##Presence of EDI structures vs number of discount types per society:
#change labels to se for plot facets
dat$society_EDI_structure2 <- factor(dat$society_EDI_structure, levels = c("no", "yes"),
labels = c("without EDI structure", "with EDI structure"))
#plot
plot_EDI_discount_types_count <- dat %>%
#mutate(text = fct_reorder(text, value)) %>%
ggplot(aes(x = discount_types_count, color = society_EDI_structure, fill = society_EDI_structure)) +
geom_histogram(alpha = 1, binwidth = 1) +
scale_fill_manual(values=c("#4477AA", "#66CCEE")) +
scale_color_manual(values=c("white", "white")) +
scale_x_continuous(breaks = seq(1,7, by = 1)) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(
legend.position = "none",
panel.spacing = unit(0.1, "lines"),
strip.text.x = element_text(size = 8)
) +
labs(title = "Number of discount types per society by society EDI structure",
x = "count of discount types",
y = "count of societies") +
facet_wrap(~society_EDI_structure2)Presence of EDI statement versus amounts and number of types of individual-level discounts.
Figure 4 - combine 2 panels and save:
#assemble the panels using patchwork package
figure4 <- plot_EDI_discount_types_percent / plot_EDI_discount_types_count +
plot_layout(ncol = 1, nrow = 2) +
plot_annotation(tag_levels = "A")
#ggsave(plot = figure4, here("plots", "Fig4AB_v2.png"), width = 18, height = 14, units = "cm", dpi = "retina", scale = 1.2)
#ggsave(plot = figure4, here("plots", "Fig4AB_v2.pdf"), width = 18, height = 14, units = "cm", scale = 1.2)EXTRA - contingency tables by National (by name) and International (not-National by name) societies:
##
## International by aims or scope of activities
## 42
## International by chapter
## 6
## International by chapter, International by aims or scope of activities
## 3
## International by name
## 64
## International by name, International by aims or scope of activities
## 1
## International by name, International by chapter
## 1
## National by name, International by aims or scope of activities
## 45
## National by name, International by chapter
## 4
## National by name, International by chapter, International by aims or scope of activities
## 3
dat$society_NI <- substr(dat$society_type, 1, 1) #Extract first letter
table(dat$society_NI) #52 (31%) national by name##
## I N
## 117 52
##
## I N
## no 75 32
## yes 42 20
##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_structure, dat$society_NI)
## p-value = 0.8629
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.5338501 2.3008053
## sample estimates:
## odds ratio
## 1.115339
##
## I N
## no 75 32
## yes 42 20
##
## Fisher's Exact Test for Count Data
##
## data: table(dat$society_EDI_structure, dat$society_NI)
## p-value = 0.8629
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.5338501 2.3008053
## sample estimates:
## odds ratio
## 1.115339
table(dat$individual_waivers, dat$society_NI) #national societies as likely to offer individual waivers as international ones##
## I N
## no 99 45
## yes 18 7
##
## Fisher's Exact Test for Count Data
##
## data: table(dat$individual_waivers, dat$society_NI)
## p-value = 0.8185
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.2816479 2.3409812
## sample estimates:
## odds ratio
## 0.8563039
table(dat$countries_paymore, dat$society_NI) #national societies more likely to ask outsiders to pay more##
## I N
## no 111 35
## yes 6 17
##
## Fisher's Exact Test for Count Data
##
## data: table(dat$countries_paymore, dat$society_NI)
## p-value = 5.435e-06
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 3.038744 29.617160
## sample estimates:
## odds ratio
## 8.842602
table(dat$countries_payless, dat$society_NI) #national societies as likely to offer country-level fee discounts ##
## I N
## no 67 30
## yes 50 22
##
## Fisher's Exact Test for Count Data
##
## data: table(dat$countries_payless, dat$society_NI)
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.4787918 1.9993118
## sample estimates:
## odds ratio
## 0.9827658
Societies taking voluntary donations:
##
## no yes
## 76 93
What are the tangible benefits of individual society membership (e.g., awards, travel grants, conference fee discounts, journal subscriptions)?
Count types of discounts (benefits_types need to be splitted):
#dat$benefits_types #note some have "" (no info) - replace with "no info", but remember to remove from counting benefits later
table(dat$benefits_types == "") #9 have "" (no info)##
## FALSE TRUE
## 160 9
#dat <- dat %>% mutate(benefits_types == "", "no info", benefits_types) #replace values
#simmplify options by creating separate variables for each type of benefit
dat2 <- dat %>%
mutate(conference = str_detect(benefits_types, "Conference registration discount or waiver"),
funding = str_detect(benefits_types, "Funding"),
journal = str_detect(benefits_types, "Journal subscription discount or waiver"),
APC = str_detect(benefits_types, "Publication fees"),
networking = str_detect(benefits_types, "Networking or professional development"),
other = str_detect(benefits_types, "Other"),
none = str_detect(benefits_types, "[^/s]", negate = TRUE)) #empty ones
#count of each type of benefit (as a sum)
dat2 %>%
select(conference, funding, journal, APC, networking, other, none) %>%
summarise_all(sum) ## conference funding journal APC networking other none
## 1 109 98 118 63 114 48 9
#make a simple bar plot
dat2 %>%
select(conference, funding, journal, APC, networking, other, none) %>%
gather(key = "benefit", value = "presence") %>%
group_by(benefit) %>%
count(presence) %>%
filter(presence == TRUE) %>%
ggplot(aes(x = reorder(benefit, n), y = n)) +
geom_bar(stat = "identity", position = position_dodge(0.9)) +
geom_text(aes(label = n), hjust = -0.15) +
lims(y = c(0, 150)) +
coord_flip()Frequencies of member benefits types
Make upset plot to show frequencies of combinations of member benefit types:
#reshape into a column with list of values
dat2 <- dat2 %>%
mutate(benefits_types_list = list(c("conference", "funding", "journal", "APC", "networking", "other", "none"))) #create a list of values
#select columns with society name and individual benefit presence
benefits_wide <- dat2 %>%
select(society, conference, funding, journal, APC, networking, other, none)
#reshape into long tibble
benefits_long <- benefits_wide %>%
as_tibble(rownames = "society") %>%
gather(Benefit, present, -society) %>%
select(-1) %>%
filter(present == TRUE) %>%
select(-3)
#aggregate the society and create a list-column with the benefit information
benefits_list <- benefits_long %>%
group_by(society) %>%
summarize(benefit_types_list = list(Benefit))
#str(benefits_list)
#make upset plot using library(ggupset)
figure5 <- benefits_list %>%
ggplot(aes(x = benefit_types_list)) +
geom_bar(fill = "#21281D",width = 0.8) +
scale_y_continuous(limits = c(0, 30)) +
scale_x_upset(order_by = c("freq")) +
theme_combmatrix(combmatrix.panel.striped_background = FALSE,
combmatrix.panel.point.color.fill = "#21281D",
combmatrix.panel.line.size = 0,
plot.title = element_text(family = "sans", size = 16, face = "plain", color = "#21281D"),
axis.title.x = element_text(family="sans", size = 12, color = "#21281D", face="plain"),
axis.title.y = element_text(family="sans", size = 12, color = "#21281D", face = "plain", vjust = -2),
) +
theme(plot.title = element_text(hjust = 0.5)) +
labs(title = "Combinations of main types of membership benefits",
x = "",
y = "count of societies")
#ggsave(plot = figure5, here("plots", "Fig5.png"), width = 18, height = 8, units = "cm", dpi = "retina", scale = 1.5)
#ggsave(plot = figure5, here("plots", "Fig5.pdf"), width = 18, height = 8, units = "cm", scale = 1.5)Combinations of the main types of membership benefits.
Count types of discounts (discount_types need to be split):
#replace "no fees" with "free option" to avoid confusion with "no fees" in dat$discount_types using gsub
dat$discount_types <- gsub("no fees", "free option", dat$discount_types)
#count how many have NA = no discounts at all (flat fees)
table(is.na(dat$discount_types)) #15##
## FALSE
## 169
#replace NA with "no discounts" to avoid confusion with "no fees"
dat <- dat %>% mutate(discount_types = ifelse(is.na(discount_types), "no discounts", discount_types))
#create new variable with discount types per society as a list
dat <- dat %>% mutate(discount_types_list = str_split(dat$discount_types, pattern = ", "))
#create new variable with counts of discount types per society
dat <- dat %>% mutate(discount_types_count = lengths(discount_types_list))
#simple table of number of discount types per society
table(dat$discount_types_count) #60 (35%) have only one type of discount##
## 1 2 3 4 5 6 7
## 60 43 33 23 6 3 1
#count each discount by type and show percentage of societies
dat %>%
unnest(discount_types_list) %>%
count(discount_types_list) %>%
filter(discount_types_list!= "") %>%
arrange(desc(n)) %>%
mutate(percentage = n/169 * 100) -> discount_types_table
discount_types_table## # A tibble: 14 × 3
## discount_types_list n percentage
## <chr> <int> <dbl>
## 1 student 138 81.7
## 2 retired/emeritus 64 37.9
## 3 postdoc / ECR (excluding students) 41 24.3
## 4 family 34 20.1
## 5 other 32 18.9
## 6 non-academic specialists 18 10.7
## 7 junior 17 10.1
## 8 no discounts 15 8.88
## 9 unemployed 10 5.92
## 10 free option 7 4.14
## 11 general community/public 5 2.96
## 12 employed part-time 4 2.37
## 13 fees proportional to income brackets 4 2.37
## 14 discretionary fee amount 3 1.78
#make a simple bar plot
discount_types_table %>%
filter(discount_types_list!= "") %>%
ggplot(aes(x = reorder(discount_types_list, n), y = n)) +
geom_bar(stat = "identity", position = position_dodge(0.9)) +
geom_text(aes(label = n), hjust = -0.15) +
lims(y = c(0, 150)) +
coord_flip()#dat$discount_types_list <- str_split(dat$discount_types, pattern = ", ") #same as above but modifies original data frameHow many benefit categories per society?
#select columns with society name and individual benefit presence, except "none", and
benefit_types_count <- dat2 %>%
select(conference, funding, journal, APC, networking, other) %>%
rowSums(.)
#add benefit_types_countcolumn to the data frame
dat2 <- dat2 %>% mutate(benefit_types_count = benefit_types_count)
#table of benefits counts frequencies
table(dat2$benefit_types_count)##
## 0 1 2 3 4 5 6
## 9 20 27 33 35 33 12
#table with percentages
round(table(dat2$benefit_types_count)/length(dat2$benefit_types_count)*100,1)##
## 0 1 2 3 4 5 6
## 5.3 11.8 16.0 19.5 20.7 19.5 7.1
#make a simple bar plot
dat2 %>%
#mutate(text = fct_reorder(text, value)) %>%
ggplot(aes(x = benefit_types_count)) +
geom_histogram(alpha = 0.8, binwidth = 1) +
scale_x_continuous(breaks = seq(1,7, by = 1)) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
theme(
legend.position = "none",
panel.spacing = unit(0.1, "lines"),
strip.text.x = element_text(size = 8)
) +
labs(title = "Count of benefit types per society",
x = "count of benefits",
y = "count of societies") Frequencies of numbers of member benefit categories per society.
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Edmonton
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] sjPlot_2.8.15 lme4_1.1-35.1 Matrix_1.6-5 wacolors_0.3.1
## [5] ggpattern_1.1.0-0 tokenizers_0.3.0 stopwords_2.3 tidytext_0.4.1
## [9] ggbeeswarm_0.7.2 patchwork_1.2.0 ggupset_0.3.0 readxl_1.4.3
## [13] scales_1.3.0 ggmosaic_0.3.4 ggcharts_0.2.1 ggimage_0.3.3
## [17] knitr_1.45 DT_0.31 rstatix_0.7.2 countrycode_1.5.0
## [21] here_1.0.1 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
## [25] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [29] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] rlang_1.1.3 magrittr_2.0.3 compiler_4.3.2 vctrs_0.6.5
## [5] pkgconfig_2.0.3 fastmap_1.1.1 backports_1.4.1 magick_2.8.3
## [9] labeling_0.4.3 utf8_1.2.4 rmarkdown_2.25 tzdb_0.4.0
## [13] nloptr_2.0.3 xfun_0.42 cachem_1.0.8 jsonlite_1.8.8
## [17] highr_0.10 SnowballC_0.7.1 sjmisc_2.8.9 ggeffects_1.4.0
## [21] broom_1.0.5 R6_2.5.1 bslib_0.6.1 stringi_1.8.3
## [25] car_3.1-2 boot_1.3-28.1 estimability_1.5 jquerylib_0.1.4
## [29] cellranger_1.1.0 Rcpp_1.0.12 modelr_0.1.11 pacman_0.5.1
## [33] splines_4.3.2 timechange_0.2.0 tidyselect_1.2.0 rstudioapi_0.15.0
## [37] abind_1.4-5 yaml_2.3.8 sjlabelled_1.2.0 lattice_0.21-9
## [41] plyr_1.8.9 bayestestR_0.13.2 withr_3.0.0 coda_0.19-4.1
## [45] evaluate_0.23 gridGraphics_0.5-1 pillar_1.9.0 carData_3.0-5
## [49] janeaustenr_1.0.0 insight_0.19.10 ggfun_0.1.4 plotly_4.10.4
## [53] generics_0.1.3 rprojroot_2.0.4 productplots_0.1.1 hms_1.1.3
## [57] munsell_0.5.0 minqa_1.2.6 xtable_1.8-4 glue_1.7.0
## [61] emmeans_1.10.0 lazyeval_0.2.2 tools_4.3.2 data.table_1.15.0
## [65] mvtnorm_1.2-4 fs_1.6.3 grid_4.3.2 colorspace_2.1-0
## [69] nlme_3.1-163 performance_0.11.0 beeswarm_0.4.0 vipor_0.4.7
## [73] cli_3.6.2 fansi_1.0.6 viridisLite_0.4.2 sjstats_0.18.2
## [77] gtable_0.3.4 yulab.utils_0.1.4 sass_0.4.8 digest_0.6.34
## [81] ggrepel_0.9.5 ggplotify_0.1.2 farver_2.1.1 htmlwidgets_1.6.4
## [85] memoise_2.0.1 htmltools_0.5.7 lifecycle_1.0.4 httr_1.4.7
## [89] MASS_7.3-60